{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Inductive Clustering\n\n\nClustering can be expensive, especially when our dataset contains millions\nof datapoints. Many clustering algorithms are not :term:`inductive` and so\ncannot be directly applied to new data samples without recomputing the\nclustering, which may be intractable. Instead, we can use clustering to then\nlearn an inductive model with a classifier, which has several benefits:\n\n- it allows the clusters to scale and apply to new data\n- unlike re-fitting the clusters to new samples, it makes sure the labelling\n  procedure is consistent over time\n- it allows us to use the inferential capabilities of the classifier to\n  describe or explain the clusters\n\nThis example illustrates a generic implementation of a meta-estimator which\nextends clustering by inducing a classifier from the cluster labels.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Authors: Chirag Nagpal\n#          Christos Aridas\nprint(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.base import BaseEstimator, clone\nfrom sklearn.cluster import AgglomerativeClustering\nfrom sklearn.datasets import make_blobs\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.utils.metaestimators import if_delegate_has_method\n\n\nN_SAMPLES = 5000\nRANDOM_STATE = 42\n\n\nclass InductiveClusterer(BaseEstimator):\n    def __init__(self, clusterer, classifier):\n        self.clusterer = clusterer\n        self.classifier = classifier\n\n    def fit(self, X, y=None):\n        self.clusterer_ = clone(self.clusterer)\n        self.classifier_ = clone(self.classifier)\n        y = self.clusterer_.fit_predict(X)\n        self.classifier_.fit(X, y)\n        return self\n\n    @if_delegate_has_method(delegate='classifier_')\n    def predict(self, X):\n        return self.classifier_.predict(X)\n\n    @if_delegate_has_method(delegate='classifier_')\n    def decision_function(self, X):\n        return self.classifier_.decision_function(X)\n\n\ndef plot_scatter(X,  color, alpha=0.5):\n    return plt.scatter(X[:, 0],\n                       X[:, 1],\n                       c=color,\n                       alpha=alpha,\n                       edgecolor='k')\n\n\n# Generate some training data from clustering\nX, y = make_blobs(n_samples=N_SAMPLES,\n                  cluster_std=[1.0, 1.0, 0.5],\n                  centers=[(-5, -5), (0, 0), (5, 5)],\n                  random_state=RANDOM_STATE)\n\n\n# Train a clustering algorithm on the training data and get the cluster labels\nclusterer = AgglomerativeClustering(n_clusters=3)\ncluster_labels = clusterer.fit_predict(X)\n\nplt.figure(figsize=(12, 4))\n\nplt.subplot(131)\nplot_scatter(X, cluster_labels)\nplt.title(\"Ward Linkage\")\n\n\n# Generate new samples and plot them along with the original dataset\nX_new, y_new = make_blobs(n_samples=10,\n                          centers=[(-7, -1), (-2, 4), (3, 6)],\n                          random_state=RANDOM_STATE)\n\nplt.subplot(132)\nplot_scatter(X, cluster_labels)\nplot_scatter(X_new, 'black', 1)\nplt.title(\"Unknown instances\")\n\n\n# Declare the inductive learning model that it will be used to\n# predict cluster membership for unknown instances\nclassifier = RandomForestClassifier(random_state=RANDOM_STATE)\ninductive_learner = InductiveClusterer(clusterer, classifier).fit(X)\n\nprobable_clusters = inductive_learner.predict(X_new)\n\n\nplt.subplot(133)\nplot_scatter(X, cluster_labels)\nplot_scatter(X_new, probable_clusters)\n\n# Plotting decision regions\nx_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\ny_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\nxx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),\n                     np.arange(y_min, y_max, 0.1))\n\nZ = inductive_learner.predict(np.c_[xx.ravel(), yy.ravel()])\nZ = Z.reshape(xx.shape)\n\nplt.contourf(xx, yy, Z, alpha=0.4)\nplt.title(\"Classify unknown instances\")\n\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}